Done!
Done!
# access the MASS package
library(MASS)
# load the data
data("Boston")
# pass Boston to another object for easy typing
bos <- Bostonlibrary(tidyverse)## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.6 ✔ dplyr 1.0.8
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
#explore structure
str(bos)## 'data.frame': 506 obs. of 14 variables:
## $ crim : num 0.00632 0.02731 0.02729 0.03237 0.06905 ...
## $ zn : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
## $ indus : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
## $ chas : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nox : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
## $ rm : num 6.58 6.42 7.18 7 7.15 ...
## $ age : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
## $ dis : num 4.09 4.97 4.97 6.06 6.06 ...
## $ rad : int 1 2 2 3 3 3 5 5 5 5 ...
## $ tax : num 296 242 242 222 222 222 311 311 311 311 ...
## $ ptratio: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
## $ black : num 397 397 393 395 397 ...
## $ lstat : num 4.98 9.14 4.03 2.94 5.33 ...
## $ medv : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
#explore dimensions
dim(bos)## [1] 506 14
#generate a codebook
# string is copy from dataset introduction
codebook <- data.frame(variable = "CRIM - per capita crime rate by town/ZN - proportion of residential land zoned for lots over 25,000 sq.ft./INDUS - proportion of non-retail business acres per town./CHAS - Charles River dummy variable (1 if tract bounds river; 0 otherwise)/NOX - nitric oxides concentration (parts per 10 million)/RM - average number of rooms per dwelling/AGE - proportion of owner-occupied units built prior to 1940/DIS - weighted distances to five Boston employment centres/RAD - index of accessibility to radial highways/TAX - full-value property-tax rate per $10,000/PTRATIO - pupil-teacher ratio by town/B - 1000(Bk-0.63)^2 where Bk is the proportion of blacks by town/LSTAT - % lower status of the population/MEDV - Median value of owner-occupied homes in $1000's")
codebook <- codebook %>%
separate_rows(variable, sep = "/") %>% # "/" is the delimiter for rows
separate(variable, sep = " - ", #" - " is the delimiter for variables
into = c("name", "description"), # names of sparated variables
remove = T) #remove old column
#check codebook
library(DT)
codebook %>% datatable The data set has 506 observations of 14 variables. Variable CHAS is a dummy variable where 1 means the the place having tract that bounds river, and 0 means otherwise. It needs to be converted to a factor.
bos <- bos %>%
mutate(chas = chas %>%
factor() %>%
fct_recode(
"With tracts that bonds river" = "1", #old value 1 to new label
"Otherwise" = "0") # old value 0 to new label
)Each of the 506 rows in the dataset describes a Boston suburb or town, and it has 14 columns with information such as average number of rooms per dwelling, pupil-teacher ratio, and per capita crime rate. The last row describes the median price of owner-occupied homes.
library(GGally)## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(ggplot2)
my.fun <- function(data, mapping, method = "lm",...){
p <- ggplot(data = data, mapping = mapping) +
geom_point(size = 0.3, color = "blue",...) +
geom_smooth(size = 0.5, color = "red", method = method)
p
}
names1 <- pull(codebook[1:7,], description)
names1 <- sapply(names1, function(x) paste(strwrap(x, 35), collapse = "\n"))
ggpairs(bos,
lower = list(
continuous = my.fun,
combo = wrap("facethist", bins = 20)),
col = 1:7,
columnLabels = names1)## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
Visualized relations of Boston dataset, variable #1~#7
names2 <- pull(codebook[8:14,], description)
names2 <- sapply(names1, function(x) paste(strwrap(x, 35), collapse = "\n"))
ggpairs(bos,
lower = list(
continuous = my.fun),
col = 8:14,
columnLabels = names2,
)## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
Visualized relations of Boston dataset, variable #8~#14
library(finalfit)
#summarise the coninuous data
ff_glimpse(bos)$Continuous %>% datatable# summarise the categorical data
ff_glimpse(bos)$Categorical %>% datatablebos %>% ggplot(aes(x= crim)) +
geom_boxplot()There are 13 continuous variables in the dataset. The crime rate of the town was 0.3(0.1~3.7)%; the proportion of a town’s residential land zoned for lots over 25,000 sq.ft. was 0 (0~12.5)%; the proportion of non-retail business acres per town was 9.7(5.2~18.1)%; the nitric oxides concentration was 0.5(0.4~0.6) parts per 10 million; the average number of rooms per dwelling was 6.3±0.7 rooms; the proportion of owner-occupied units built prior to 1940 was 77.5(45.0~94.1)%; the weighted distances to five Boston employment centres was 3.2 (2.1~5.2) kilometers; the index of accessibility to radial highways was 5(4~24) units of accessibility; the full-value property-tax rate was $330(279~666) per $10,000; the pupil-teacher ratio by town was 19.1(17.4~20.2); the Black proportion of population after taking the formula of 1000(Bk-0.63)^2 was 391.4(375.4~396.2); the proportion of population that is lower status was 11.4(6.9~17.0)%; the median value of owner-occupied homes was $21.2(17~25)*1000. #### 3.3.2 interpreting categorical variable
35(6.9%) towns have tracts that bonds Charles River.
to be supplemented.
library(MASS)
bos.s <- as.data.frame(scale(bos[-4])) # there is no need to scale categorical data, so it was held out for scaling
# bos.s means Boston Scaled
bos.s$chas <- bos$chas # add categorical variable back for further analysisff_glimpse(bos.s)$Con %>% datatableAll the numeric variables after scaling had a mean of 0 and most of variables’ values ranged from -4 and 4, only except for variables crim, which might be due to outliers.
#generate cutoff according to quantile
bins <- quantile(bos.s$crim)
#generate a categorical variable "crime" and re-code it
bos.s <- bos.s %>%
mutate(crime = crim %>%
cut(breaks = bins, include.lowest = TRUE) %>%
fct_recode("Low" = "[-0.419,-0.411]",
"MediumLow" = "(-0.411,-0.39]",
"MediumHigh" = "(-0.39,0.00739]",
"High" = "(0.00739,9.92]"))
#remove crim
bos.s <- bos.s %>% select(-crim)set.seed(2022)
#generate an object containing the number of obs.
n <- nrow(bos.s)
#
ind <- sample(1:n, size = n*0.8)
#
train <- bos.s[ind,]
test <- bos.s[-ind,]lda.fit <- lda(crime ~ ., data = train) # the function for lda biplot arrows
lda.arrows <- function(x, myscale = 1, arrow_heads = 0.1, color = "red", tex = 0.75, choices = c(1,2)){
heads <- coef(x)
arrows(x0 = 0, y0 = 0,
x1 = myscale * heads[,choices[1]],
y1 = myscale * heads[,choices[2]], col=color, length = arrow_heads)
text(myscale * heads[,choices], labels = row.names(heads),
cex = tex, col=color, pos=3)
}
# target classes as numeric
classes <- as.numeric(factor(train$crime))
#plot the lda results
plot(lda.fit, dimen = 2, pch = classes, col = classes)+
lda.arrows(lda.fit, myscale = 4)## integer(0)
#save
classes.test <- test$crime
#remove old
test$crime <- NULLpredicted.test <- predict(lda.fit, test)a <- table(correct = classes.test, predicted = predicted.test$class )
a## predicted
## correct Low MediumLow MediumHigh High
## Low 4 7 0 0
## MediumLow 10 16 4 0
## MediumHigh 2 11 17 1
## High 0 0 0 30
correct.n = 0
for (i in 1:4){
correct.c <- a[which(rownames(a) == colnames(a)[i]), i]
correct.n = correct.c+ correct.n
}
correct.n/(nrow(bos.s)*0.2)## [1] 0.6620553
#reload Boston
data("Boston")
bos <- Boston
#standardize the dataset
bos.s <- scale(bos)dis_eu <- dist(bos)
summary(dis_eu)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.119 85.624 170.539 226.315 371.950 626.047
bos.s.km <- kmeans(bos.s, centers = 4) set.seed(22)
# determine the number of clusters
k_max <- 10
# calculate the total within sum of squares
twcss <- sapply(1:k_max, function(k){kmeans(bos.s, k)$tot.withinss})
# visualize the results
qplot(x = 1:k_max, y = twcss, geom = 'line')km <- kmeans(bos.s, centers = 3)ggpairs(bos,
lower = list(
continuous = wrap("points", size = 0.3, color = factor(km$cluster)),
combo = wrap("facethist", bins = 20)
),
col = 1:7)ggpairs(bos[8:14],
lower = list(
continuous = wrap("points", size = 0.3, color = factor(km$cluster),
combo = wrap("facethist", bins = 20))))## Warning: Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
## Ignoring unknown parameters: combo
bos %>% ggplot(aes(x = black, y = medv, color = factor(km$cluster))) +
geom_point() +
geom_abline(intercept = 40, slope = -0.1)+
geom_abline(intercept = 30, slope = -0.01) +
stat_ellipse(geom = "polygon",
aes(fill = km$cluster),
alpha = 0.25)bos %>% ggplot(aes(x = nox, y = age, color = factor(km$cluster))) +
geom_point() +
geom_abline(intercept = 140, slope = -120)+
geom_abline(intercept = 230, slope = -250) +
stat_ellipse(geom = "polygon",
aes(fill = km$cluster),
alpha = 0.25)bos %>% ggplot(aes(x = dis, y = black, color = factor(km$cluster))) +
geom_point() +
geom_abline(intercept = 480, slope = -25)+
geom_abline(intercept = 400, slope = -80) +
stat_ellipse(geom = "polygon",
aes(fill = km$cluster),
alpha = 0.25)km <- kmeans(scale(Boston), centers = 3)bos.s <- as.data.frame(scale(Boston))
bos.s$km.cluster <- km$cluster
lda.km <- lda(km.cluster ~ ., data = bos.s) # target classes as numeric
classes <- as.numeric(factor(bos.s$km.cluster))
#plot the lda results
plot(lda.km, dimen = 2, pch = classes, col = classes)+
lda.arrows(lda.km, myscale = 4)## integer(0)
bos.s <- as.data.frame(scale(Boston))
#generate cutoff according to quantile
bins <- quantile(bos.s$crim)
#generate a categorical variable "crime" and re-code it
bos.s <- bos.s %>%
mutate(crime = crim %>%
cut(breaks = bins, include.lowest = TRUE) %>%
fct_recode("Low" = "[-0.419,-0.411]",
"MediumLow" = "(-0.411,-0.39]",
"MediumHigh" = "(-0.39,0.00739]",
"High" = "(0.00739,9.92]"))
#remove crim
bos.s <- bos.s %>% select(-crim)
set.seed(2022)
#generate an object containing the number of obs.
n <- nrow(bos.s)
#
ind <- sample(1:n, size = n*0.8)
#
train <- bos.s[ind,]
test <- bos.s[-ind,]model_predictors <- dplyr::select(train, -crime)
# check the dimensions
dim(model_predictors)## [1] 404 13
dim(lda.fit$scaling)## [1] 13 3
str(model_predictors)## 'data.frame': 404 obs. of 13 variables:
## $ zn : num -0.487 -0.487 -0.487 -0.487 -0.487 ...
## $ indus : num -0.7196 1.015 -0.0797 -0.1803 -1.1511 ...
## $ chas : num -0.272 -0.272 -0.272 -0.272 -0.272 ...
## $ nox : num -0.4375 1.3661 -0.5669 -0.0923 -0.8172 ...
## $ rm : num 1.25 -0.109 -0.56 -1.867 -0.2 ...
## $ age : num 0.402 0.939 -1.644 -1.093 -1.292 ...
## $ dis : num -0.2751 -0.7469 0.0714 -0.6058 0.9871 ...
## $ rad : num -0.178 1.66 -0.637 -0.637 -0.637 ...
## $ tax : num -0.601 1.529 -0.779 -0.618 0.129 ...
## $ ptratio: num -0.4876 0.8058 0.0667 -0.0257 -0.7185 ...
## $ black : num 0.1687 -2.8046 0.4406 -0.0682 0.1303 ...
## $ lstat : num -0.88125 0.35246 -0.24969 -0.00183 -0.49895 ...
## $ medv : num 0.98587 -1.17785 0.00731 -0.69944 -0.29714 ...
str(lda.fit$scaling)## num [1:13, 1:3] 0.0862 -0.0208 0.4024 -0.1155 0.2563 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:13] "zn" "indus" "nox" "rm" ...
## ..$ : chr [1:3] "LD1" "LD2" "LD3"
# matrix multiplication
matrix_product <- as.matrix(model_predictors) %*% lda.fit$scaling
matrix_product <- as.data.frame(matrix_product)
as.matrix(model_predictors) %*% lda.fit$scaling## LD1 LD2 LD3
## 228 -1.14003053 -1.06380834 1.176797380
## 435 -2.52084943 -0.81709800 0.402381513
## 206 0.01498784 0.42819764 0.757184324
## 311 -2.26434057 0.48160729 0.677912129
## 331 3.15176185 1.60723749 -0.300913367
## 196 2.51878869 1.66407861 -2.356875243
## 262 -2.90725169 -1.65843136 0.323960426
## 191 3.86954230 2.14188542 -1.579166391
## 504 -2.81287613 -1.29776557 0.539263007
## 476 -1.91153258 -1.20778908 1.916110767
## 123 -2.92322169 -1.41236818 0.566274735
## 233 -0.25364273 -1.70822625 1.195679048
## 270 1.64700326 -1.51127399 -5.472023975
## 248 6.20029559 2.74665868 -0.912915654
## 7 2.54856697 1.28312419 -0.313999554
## 349 6.96575880 4.49143339 -3.626055180
## 439 -2.73599958 -1.27307377 0.072562197
## 112 -1.63425095 -0.86491112 0.680325470
## 470 -1.34111835 -0.39715169 2.927049290
## 453 -1.83535544 -1.09019202 2.379955444
## 1 0.36738817 0.76119242 -0.776505449
## 307 -1.16603668 -0.37199137 0.143016485
## 288 5.36601577 3.42244681 -1.932651594
## 449 -1.92864870 -1.06505046 2.423769725
## 354 12.25562958 6.27574298 -4.372718690
## 378 -2.88616563 -1.97035414 2.394499183
## 359 -0.20066843 -3.12556362 -2.583875869
## 157 -3.86684766 -0.47286473 -1.854035599
## 12 3.65751219 1.63620753 -0.639488618
## 463 -1.45294101 -0.89923913 2.547017545
## 316 0.03252007 0.53298999 0.421513870
## 428 -3.11041755 -0.84356316 -0.045505468
## 454 -1.43305200 -1.83165661 1.986845936
## 447 -2.36562562 -1.23615665 1.901274257
## 480 -2.66921101 -1.27459093 2.883403380
## 150 -3.51602432 -1.11514336 -0.140454913
## 197 5.57024031 3.38466431 -3.760732049
## 11 4.23885368 1.30666814 -1.054851960
## 107 -2.52915649 -0.69185686 1.096997341
## 276 0.46759717 0.97337894 -0.739310928
## 62 4.47852316 2.32515546 -0.674010806
## 72 2.05030755 1.12787135 0.362890197
## 3 1.24862049 -0.17073174 0.448467802
## 452 -1.75500199 -1.33350167 2.015039999
## 285 5.50539568 3.70972876 -4.075516333
## 194 3.68362650 2.73138030 -2.511392127
## 290 5.45620503 3.10983059 -2.169215454
## 465 -1.14240508 -0.68140063 2.696210709
## 256 8.28255396 5.48583083 -4.149237820
## 127 -3.18057685 -1.48720703 0.358774592
## 234 -0.80183408 -1.98331418 1.525618749
## 423 -3.01630507 -0.87633538 2.464629420
## 353 10.42196606 5.87181987 -3.408385527
## 287 8.05898987 5.07069990 -4.401672650
## 56 7.26929455 4.21727015 -3.467675035
## 486 0.44443718 -0.10635599 2.469947846
## 382 -2.64775897 -1.64957852 2.337880390
## 19 -0.65816354 0.76330109 0.217402320
## 497 -1.68802964 -0.16312736 1.037804782
## 306 -0.77317819 0.42789123 0.097624384
## 425 -2.93703686 -0.40453822 0.180522495
## 361 -2.21311378 -1.05130717 2.650237427
## 493 -2.65725513 -0.93342755 0.221422758
## 464 -1.41772886 -0.93069404 2.499685014
## 32 0.37022503 0.37899059 0.224421296
## 120 -1.61684211 -0.10644873 0.802431702
## 417 -2.61082823 -1.53923380 -0.458324059
## 99 -0.93481333 -1.32612755 0.997787488
## 218 -0.88153295 -0.85801812 0.766345041
## 467 -1.98839852 -0.56735055 0.242218661
## 438 -2.84898008 -1.17610723 -0.348148202
## 400 -2.62517311 -1.22354908 1.904675400
## 87 0.77740452 0.57962278 0.594563076
## 319 -0.60128110 -0.24667791 0.586109649
## 351 7.30992438 3.82926197 -2.346088527
## 389 -2.99749213 -0.80181392 2.646083495
## 336 2.89669695 1.50208619 0.533171300
## 42 2.72487097 0.93853241 0.153966870
## 243 4.92536865 2.42238514 -1.269449992
## 461 -1.63581826 -1.17630282 1.356443820
## 344 3.03888995 2.39472332 -2.076122984
## 24 0.34172831 0.28960793 0.408871264
## 335 3.65044772 1.58898518 0.270946119
## 146 -3.64546871 -1.62528049 -1.589359083
## 481 -0.45077187 -0.45799959 2.826302166
## 488 -0.89240592 -0.25843426 2.767736264
## 237 1.98602800 -2.47033154 -4.349764322
## 110 -1.96708020 -0.68215253 0.865632420
## 6 2.82988498 1.12321979 0.473540614
## 289 5.40950062 3.30526051 -2.011633586
## 247 6.13826900 2.92188956 -0.608930329
## 279 0.38650941 1.16333335 -0.742977304
## 104 -1.77653679 -0.45578715 0.890345960
## 282 1.91915386 0.73436272 -0.040691230
## 207 0.60865091 0.14880302 0.590064974
## 33 0.19078098 0.07801734 -0.788695194
## 95 -0.28969455 0.48666194 -0.532542479
## 140 -3.29835305 -1.43334424 0.612804130
## 366 -4.67453524 0.30629137 3.945198823
## 313 -1.63868907 -0.37425756 0.703481877
## 267 -2.42836183 -1.14563083 -0.152749359
## 329 2.17337091 1.48223596 0.003809728
## 35 -0.27577041 0.02260238 -0.622622375
## 291 2.00010088 2.65241270 -2.545469615
## 403 -2.65543941 -1.51087205 2.262565420
## 224 -1.12484684 -0.68392283 1.442959749
## 239 3.68210560 2.18800076 -0.995635885
## 102 -1.70366245 -0.78853778 1.001752295
## 16 0.60237688 0.84339924 0.631255218
## 53 4.44256410 2.17296306 -0.780839951
## 383 -2.91995135 -1.05713207 2.660716773
## 177 -0.49726987 0.22645235 0.804996801
## 459 -1.40119524 -0.75001338 1.500346270
## 337 1.80228144 1.19254004 0.678102485
## 348 7.28588009 4.83090183 -3.934627248
## 373 -3.80191517 -4.54279542 -1.015266798
## 15 0.63938408 0.53603691 0.360627903
## 114 -1.77204471 -0.66699103 0.712496168
## 187 -1.71562902 -1.83638517 1.529850250
## 309 -0.97772444 -0.39086251 0.580472975
## 221 0.13223857 -3.40809567 -4.083504323
## 200 5.82804956 4.22473454 -3.931762434
## 81 2.22686364 1.36324465 -0.443203757
## 365 -0.55713955 -5.01494711 -3.574000497
## 496 -1.67908163 -0.14968571 1.078908376
## 201 5.94472762 4.19860957 -4.166462406
## 295 2.49507988 0.96162266 0.179209392
## 130 -2.88137167 -0.81823231 0.566611218
## 396 -2.57874709 -1.44511333 2.389260017
## 29 0.80770796 0.14075053 0.264580591
## 310 -1.29927816 -0.12600958 0.711261178
## 443 -2.53334827 -1.30514804 2.639918167
## 260 -2.84455573 -0.89632834 0.015847095
## 303 2.67761435 1.76257483 -0.830487428
## 38 -0.06850574 0.59980908 0.912946016
## 69 4.01861754 2.38888550 -0.398115108
## 46 1.76692031 1.27679382 0.395685865
## 20 -0.38036426 0.48673442 0.701243753
## 77 0.36621089 0.06245796 0.342499195
## 26 0.60190476 0.81883006 -0.196374960
## 278 2.93018621 -1.04778216 -6.133839931
## 34 -0.18154723 0.34089047 0.226994228
## 499 -2.24392457 -0.51808894 1.013996155
## 315 -0.54139004 -0.41074476 0.553481948
## 193 3.94019818 2.05790170 -1.591290474
## 430 -2.59484929 -1.25049884 0.017601490
## 368 -4.79288155 0.02264942 2.270820582
## 301 6.41140527 3.75763746 -3.264548794
## 113 -2.14138575 -0.60808280 0.817596749
## 259 -3.07386853 -1.48720703 0.078569151
## 314 -1.07881671 -0.24341171 0.626701481
## 456 -2.20098071 -1.03743718 0.007559974
## 242 3.92590585 2.22745838 -0.979244488
## 160 -3.44711534 -1.43553010 0.018281193
## 263 -2.64038698 -2.30904762 0.238736672
## 269 -1.77658429 -1.00086776 0.314385348
## 5 2.80668042 0.42954389 0.581065710
## 223 0.70695276 -3.14863583 -4.123657917
## 151 -3.56217667 -1.39670368 0.119180674
## 305 0.09915639 0.22653790 0.099425812
## 79 1.80023413 0.59266545 0.277318521
## 63 4.94541596 2.34935701 -0.487968937
## 179 -1.55126071 -0.86324596 0.972550804
## 257 3.54990812 2.63814445 -3.096846981
## 498 -1.49085585 -0.10561507 0.847247018
## 502 -2.29557626 -0.87866927 0.474364434
## 168 -2.57662933 -0.74760164 -0.468553943
## 230 -0.99438846 -0.25052879 1.409364477
## 139 -3.17108330 -1.18606827 0.458141587
## 122 -2.79176717 -1.22754345 0.534164395
## 173 -2.07297564 -0.26534778 1.185123130
## 210 1.43881510 -2.21003768 -4.489534190
## 78 0.41254717 0.32046493 0.509490760
## 302 2.86943542 1.71605915 -0.991358833
## 163 -1.87635393 -5.16850503 -4.015476188
## 131 -2.71878153 -1.28256969 0.493197927
## 416 -2.64873476 -1.43973653 -0.329038483
## 421 -2.77179449 -1.37269531 2.058699120
## 500 -2.24983267 -0.21394389 0.987669452
## 181 -2.00737356 -1.91364508 1.183308545
## 55 5.44685669 4.28307256 -3.296814150
## 133 -2.64718817 -1.19483599 0.606855604
## 358 -0.37183876 -3.46056397 -2.716466735
## 413 -3.95513960 -0.91392531 0.976593312
## 450 -2.06849734 -1.22279285 1.693625845
## 83 2.23698876 1.70419767 -0.448437603
## 293 1.98297432 2.79911959 -2.499683864
## 341 1.21205398 0.91475466 0.704098455
## 67 4.54730919 4.23208944 -3.270895559
## 145 -3.63537451 -0.90604808 0.226908322
## 483 -0.37299700 -0.99928633 2.641893017
## 343 3.91970938 1.60529112 -1.045494448
## 91 -1.18371265 -0.23161719 0.503007989
## 236 -0.42779235 0.03049503 1.106881406
## 334 3.57018268 1.57186523 0.348285353
## 136 -2.63497850 -1.30394404 0.457481057
## 401 -2.43537230 -1.26270200 2.213136842
## 231 -0.44622935 0.04494985 1.166769166
## 121 -2.75482642 -1.10968006 0.718900361
## 166 -2.81778714 -0.99245892 -0.360943685
## 355 9.97374252 6.43564155 -3.831759469
## 111 -1.67601005 -0.38167805 0.952920388
## 48 2.89055900 0.92619543 -0.051811812
## 338 2.38707072 1.31102904 0.523236538
## 18 0.44757738 0.40168087 0.425863315
## 175 -1.85137223 -0.13926869 1.034330204
## 266 -2.86359985 0.21444616 0.112238111
## 240 3.70515723 1.98246156 -1.029618654
## 346 6.19532691 2.60609207 -0.600369133
## 189 1.19911580 1.69288416 -1.313494214
## 132 -2.56045249 -1.12081584 0.540609572
## 205 1.81229127 1.62849534 -2.876276349
## 308 -1.01260075 0.18157665 0.080606731
## 277 2.94625697 -1.52631945 -6.305333274
## 326 2.13921495 1.01967606 0.549371802
## 283 3.16923541 -2.38584264 -5.154476087
## 148 -3.57226045 -0.95260992 0.289375503
## 398 -2.72785848 -0.95802306 2.446682295
## 134 -2.35451581 -0.73205401 0.552628592
## 61 4.96662247 2.73464561 -0.444843645
## 58 6.84508639 4.66619303 -4.067601202
## 176 -1.24231020 -0.30714487 1.009026553
## 273 0.02516274 0.48450096 -0.173077524
## 441 -2.42724431 -0.99396754 2.378507238
## 105 -2.26258682 -0.61849338 0.978310173
## 380 -2.92504355 -1.50610551 2.427267061
## 399 -2.66843047 -1.06271111 2.377638293
## 138 -2.98493246 -1.40128451 0.435553508
## 92 -1.19712020 -0.22014746 0.494762260
## 208 0.57091680 0.25509652 0.625781083
## 397 -2.58342513 -1.46599183 2.412439293
## 188 0.08685456 0.97795580 -1.058325447
## 37 -0.88388151 0.20665771 0.856274078
## 436 -2.41221151 -1.44635753 0.324723239
## 59 5.75180825 2.96728060 -0.507445177
## 318 0.15455832 0.29580392 0.516839862
## 238 0.31985840 -0.65743309 1.015956562
## 457 -2.19785735 -0.58563828 -0.157573897
## 479 -2.01394106 -1.11222253 2.478322216
## 342 4.76050824 1.98364951 -1.815609377
## 172 -2.25424482 -0.73090851 0.107335902
## 31 0.58772343 0.43094655 0.087162591
## 80 0.95807533 0.78211175 0.555671630
## 220 0.75062496 -2.90587282 -4.677968649
## 268 -2.35987105 -2.16092680 0.358511441
## 235 1.07746687 -2.79690887 -4.268853061
## 332 4.39026957 3.06363302 -1.922149126
## 101 -1.86770240 -0.91546215 1.072630173
## 170 -2.42392262 -1.19751374 -0.018550297
## 312 -2.00836336 -0.27458122 0.864201908
## 472 -0.82498270 -0.67645314 2.727782692
## 333 4.26778811 2.95433599 -2.111459197
## 294 2.47904337 0.97566109 0.242796966
## 135 -2.65470583 -0.70287793 -0.360566046
## 369 -5.47930209 -1.41487283 4.826389415
## 429 -2.70440737 -1.04746326 0.408040594
## 161 -1.86541177 -3.60388298 -4.834865036
## 8 3.06669425 0.89106603 -0.283735898
## 39 -0.24874755 0.37137091 1.029006029
## 202 3.96165298 3.86563690 -3.595153825
## 473 -1.16620851 -1.00031863 2.778840011
## 47 1.86259346 1.06543087 0.365365418
## 491 -2.75877440 -0.65424219 -0.565370420
## 217 0.36363669 -2.65455694 -4.469679466
## 180 -2.03780769 -1.08849672 1.344010638
## 466 -1.14880713 -0.22312608 2.348921192
## 444 -2.32163445 -1.46225550 2.334076576
## 73 2.00556670 1.19911145 0.491872619
## 272 0.73476976 1.11996636 -0.134583291
## 503 -2.69918923 -0.62093278 0.642967667
## 484 0.46501620 0.33596922 2.763991633
## 129 -3.06291219 -1.46093725 0.508057982
## 299 6.32460841 4.33591904 -3.330740160
## 255 8.22641761 5.33044293 -4.179948860
## 474 -1.87641467 -1.61704640 2.822822536
## 96 -0.80772849 -0.22796139 0.464632684
## 446 -2.72676403 -1.35136409 -0.097122211
## 275 1.76395454 -1.36327763 -5.940672448
## 49 3.25235267 1.07497994 -0.010655066
## 407 -4.06010179 -0.34446049 3.168205048
## 25 0.72204473 0.46621445 0.372480092
## 144 -3.55621943 -1.28474254 0.204261455
## 246 6.13934516 2.94260609 -0.729903440
## 394 -2.62706294 -1.16845048 2.551752748
## 433 -2.53832627 -0.92913570 0.690780915
## 149 -3.59681795 -1.13460578 0.111553126
## 52 4.44276914 2.26322033 -0.878662472
## 440 -2.63936778 -0.99965965 2.598856463
## 183 -2.29928809 -1.42734827 1.337036504
## 167 -3.40276975 -2.96778774 1.082824805
## 106 -2.74025669 -0.70373308 1.119644196
## 475 -1.89649746 -0.45832632 2.520478602
## 487 -0.17252410 -0.38550544 2.549938894
## 265 -3.04833986 -1.37783632 0.165672076
## 410 -3.68012196 -2.30685175 1.707635474
## 182 -2.55591561 -0.77745675 1.626789684
## 174 -1.82370681 -0.61062680 0.914246362
## 420 -2.67891142 -1.45653909 -0.284977730
## 363 -2.98562694 -0.55356760 2.950425128
## 281 0.91052452 -0.49970467 0.224310513
## 489 -3.00181839 -0.75704572 0.208760583
## 225 -1.76329619 -2.28998851 1.565291776
## 4 2.83515884 0.72919343 0.500771006
## 66 4.48245496 4.00489106 -3.230619828
## 364 -1.30631641 -3.22756138 -2.857446111
## 414 -3.59186301 -0.80044017 1.975840576
## 460 -1.57293335 -0.79809446 2.656674783
## 214 -0.11618997 -0.05802293 0.780929280
## 345 3.88756330 2.60370382 -2.010040814
## 451 -2.38783782 -1.20360495 -0.405012205
## 384 -3.01505761 -1.13171630 2.718086947
## 195 3.62063383 2.91017887 -2.635087150
## 432 -2.39697047 -1.50425325 0.321996221
## 203 3.72381757 2.57007929 -3.235697053
## 94 -0.36261953 0.74368143 -0.315537856
## 219 0.12696490 -3.04990027 -4.516308303
## 468 -1.71225748 -1.03984461 2.376680585
## 404 -2.75063793 -0.64273433 2.592156175
## 292 1.75129015 2.17555161 -2.236434689
## 458 -1.99098790 -0.38947646 -0.183850563
## 455 -2.16895994 -1.20365284 -0.330703927
## 190 1.23760134 1.04527390 -1.213084490
## 85 1.29510389 0.54861469 0.473923746
## 169 -2.82569609 -1.25636774 -0.094005248
## 391 -2.64038443 -0.91143538 2.723443479
## 41 2.23060572 2.51115244 -2.374114686
## 501 -1.94298261 -0.44594486 0.779929021
## 322 0.82974711 0.45192471 0.715513872
## 86 0.67402683 0.20285424 0.581312659
## 40 2.27426460 2.82321264 -2.423456959
## 505 -2.46190047 -1.02918341 0.438943233
## 198 5.63268550 3.45968269 -4.141396835
## 448 -2.01014754 -1.02422984 2.264856924
## 412 -3.49578912 -1.81650228 0.331655568
## 156 -2.04236667 -3.35835041 -7.351008919
## 477 -1.73768534 -1.32189990 2.540447716
## 192 4.04526886 2.45758507 -1.724344086
## 250 5.95046040 2.50966539 -0.658567239
## 147 -4.00680144 -0.95939906 -1.294673502
## 492 -2.70208592 -1.05286259 -0.100933098
## 115 -2.25732225 -0.64680186 0.693227251
## 51 4.56025287 2.33766365 -0.882488218
## 418 -3.28645804 -0.80590331 0.938865089
## 44 2.66190792 1.26386132 0.312303997
## 70 3.90048700 2.23771427 -0.313240251
## 327 2.14900534 1.05193754 0.536650646
## 437 -2.76898851 -1.07758840 -0.268898604
## 297 3.04250109 0.71327958 0.131911992
## 252 5.20528748 2.66647393 -0.620123451
## 415 -3.31920264 -0.53779906 0.729678555
## 264 -2.49212008 -1.32094484 -0.154180813
## 82 2.28591088 1.34384625 -0.604373678
## 258 -3.28826437 -2.73757007 0.312657375
## 323 1.11258481 0.83742546 0.663856292
## 204 1.82811852 1.75922662 -2.871857594
## 249 5.84945027 2.52500475 -0.765602602
## 506 -2.09695413 -0.15777771 0.226291542
## 103 -2.34725611 -0.40656734 -1.327516795
## 23 0.26421968 0.06776781 0.367326419
## 317 0.24145906 0.13512209 0.330044911
## 411 -4.20531021 -0.85636523 0.442220810
## 321 0.83237791 0.39524807 0.727439956
## 462 -1.61496518 -1.00271474 2.439488293
## 93 -0.22552031 0.49191820 -0.511202531
## 300 6.34448927 3.72348837 -3.258933257
## 65 7.68843707 2.45750925 -0.940196228
## 171 -2.18358627 -0.70016313 -0.358594559
## 74 2.06764678 1.01078830 0.351561607
## 339 1.21140049 0.91191474 0.752807597
## 209 1.96143049 -2.19747725 -4.687832893
## 45 2.69034706 1.25779257 0.164812761
## 286 5.55827429 3.48515112 -2.904364104
## 22 -0.03476975 0.22365833 0.633959519
## 9 3.69452967 1.29736994 -0.749508135
## 118 -1.64039324 -0.27159867 0.741273769
## 245 5.99191517 3.14359693 -0.835835796
## 251 5.31535899 2.55098619 -0.545310449
## 379 -2.92758779 -1.74592905 2.513738901
## 155 -1.67674703 -3.52381274 -5.758430855
## 154 -3.80666297 -1.07980424 -0.560946415
## 360 -2.07604508 -0.92051577 2.715113510
## 54 4.38766871 2.45885728 -0.688711142
## 89 -0.64875286 -0.47636464 0.321024730
## 419 -3.13787632 -0.92880761 -0.088298503
## 370 -3.21578565 -4.84103769 -1.101076075
## 117 -1.62615232 -0.45506467 0.760157718
## 377 -2.99078303 -1.92901599 2.243758811
## 445 -2.65902713 -1.03516558 1.382750197
## 199 5.54554427 3.25487417 -3.739808533
## 2 1.57048394 0.50984729 0.109698826
## 36 -0.84991672 0.19173860 0.913792923
## 340 1.25400049 0.93741740 0.704377533
## 186 -1.24578346 -0.38422159 1.076796919
## 98 -0.72974539 -1.51666322 0.697983980
## 388 -2.83562742 -0.82848610 2.628903436
## 362 -2.30290293 -1.10439854 2.317094048
## 97 -0.55698293 0.08915451 0.497508923
## 405 -2.77086361 -0.98338269 2.065198505
## 211 1.48775402 -2.44719309 -4.624554439
## 68 3.87073888 2.27953358 -0.263061375
## 216 -0.07217896 0.11794292 0.759099179
## 17 0.53007438 0.85579014 0.690517365
## 14 0.91181180 0.83575373 0.578380613
## 350 7.33575636 3.46606660 -2.384503377
library(plotly)##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p1 <- plot_ly(x = matrix_product$LD1,
y = matrix_product$LD2,
z = matrix_product$LD3,
type= 'scatter3d',
mode='markers',
color = train$crime,
size = 2)
p1model_predictors <- dplyr::select(train, -crime)
train.km <- kmeans(model_predictors, centers = 3)
p2 <- plot_ly(x = matrix_product$LD1,
y = matrix_product$LD2,
z = matrix_product$LD3,
type= 'scatter3d',
mode='markers',
color = factor(train.km$cluster),
size = 1.5)
p2train.crime3 <- train %>%
mutate(crime3 = crime %>%
fct_recode("Medium" = "MediumHigh" ,
"Low" = "MediumLow",
"High" = "High",
"Low" = "Low"))
km.cluster <- factor(train.km$cluster)
levels(km.cluster) <- c("Medium","Low","High")
accuracy.tab <- table(correct = train.crime3$crime3, kmean.pred = km.cluster)
accuracy.tab## kmean.pred
## correct Medium Low High
## Low 10 187 15
## Medium 10 48 37
## High 5 0 92
correct.n = 0
for (i in 1:3){
correct.c <- accuracy.tab[which(rownames(accuracy.tab) == colnames(accuracy.tab)[i]), i]
correct.n = correct.c+ correct.n
}
correct.n## [1] 289
correct.n/(nrow(bos.s)*0.8)## [1] 0.7139328
model_predictors <- dplyr::select(test, -crime)
test.km <- kmeans(model_predictors, centers = 3)
test.crime3 <- test %>%
mutate(crime3 = crime %>%
fct_recode("Medium" = "MediumHigh" ,
"Low" = "MediumLow",
"High" = "High",
"Low" = "Low"))
km.cluster <- factor(test.km$cluster)
levels(km.cluster) <- c("Medium","Low","High")
accuracy.tab <- table(correct = test.crime3$crime3, kmean.pred = km.cluster)
accuracy.tab## kmean.pred
## correct Medium Low High
## Low 13 27 1
## Medium 19 10 2
## High 1 0 29
correct.n = 0
for (i in 1:3){
correct.c <- accuracy.tab[which(rownames(accuracy.tab) == colnames(accuracy.tab)[i]), i]
correct.n = correct.c+ correct.n
}
correct.n## [1] 75
correct.n/(nrow(bos.s)*0.2)## [1] 0.7411067
6.4 Comment on the results